home *** CD-ROM | disk | FTP | other *** search
- /* file doc_buf.c ... by ^z 870830-0919-...
- * functions to load in text from the document file and then
- * save out keys and pointers to the *.k and *.p files ...
- * modified 871007-... to unify the doc-buffer loading with the
- * character-filtering and the pointer array building....
- */
-
-
- #include <stdio.h>
- #include <unix.h>
- #include <storage.h>
- #include <strings.h>
- #include <ctype.h>
- #include <proto.h>
- #include "qndxr.2.h"
-
-
- /* function to create a buffer for me to use...
- */
-
- char *make_buf (bufsiz)
- long bufsiz;
- {
- char *buf, *malloc(), *mlalloc();
- void exit();
-
- DEBUG ("--allocating a buffer, size = %ld\n", bufsiz);
-
- #ifdef LIGHTSPEED
- buf = mlalloc (bufsiz);
- #else
- buf = malloc ((unsigned int)bufsiz);
- #endif
-
- if (buf == NULL)
- {
- printf ("\nFatal error in attempt to allocate a buffer!\n");
- printf ("(bufsiz=%ld)\n", bufsiz);
- exit(1);
- }
-
- return (buf);
- }
-
-
- /* function to load the document buffer ... bring in doc_bufsiz
- * characters, and then enough more to finish out the final word,
- * followed by a terminal delimiter .... as the characters are read
- * in, filter them appropriately (depending on user choices) and
- * build the pointer array in memory to the first character of each
- * word ... return the total number of words that were
- * read in to the buffer (zero if we're finished with the file)
- *
- * ... note that one must be sure to pull in and throw away
- * any excess characters beyond KEY_LENGTH in the final word, so that
- * the remaining fragment doesn't show up as the first "word" in the
- * next chunk of the file....
- *
- * Routine modified 871007-... in order to unify the buffer-loading and
- * character-filtering and pointer-array-building operations, and to go
- * back to using getc() from <stdio> rather than Macintosh-specific
- * operations for loading the buffer....
- */
-
- long load_doc_buffer (doc, doc_bufsiz, ptr)
- char *doc, **ptr;
- long doc_bufsiz;
- {
- int c, i, in_a_word = FALSE;
- char **ptr0, *end_doc_buf;
- extern FILE *doc_file;
-
- DEBUG ("--Loading document buffer...\n", NULL);
-
- ptr0 = ptr;
- end_doc_buf = doc + doc_bufsiz;
-
- while (doc < end_doc_buf)
- {
- c = filtered_getc ();
- DEBUG ("--filtered character = \"%c\"\n", c);
- if (c == EOF)
- {
- *doc++ = '\0';
- in_a_word = FALSE;
- break;
- }
- if (! c)
- in_a_word = FALSE;
- else if (! in_a_word)
- {
- *ptr++ = doc;
- in_a_word = TRUE;
- DEBUG ("--adding new ptr = %ld\n", doc);
- }
- *doc++ = c;
- }
-
- if (doc == end_doc_buf && in_a_word)
- {
- DEBUG ("--finishing off a final buffer word...\n", NULL);
- for (i = 0; i < KEY_LENGTH; ++i)
- {
- c = filtered_getc ();
- if (c == EOF)
- {
- *doc++ = '\0';
- break;
- }
- if (! c)
- {
- *doc++ = '\0';
- break;
- }
- *doc++ = c;
- }
- if (i == KEY_LENGTH)
- while (filtered_getc ())
- ;
- }
-
- return (ptr - ptr0);
- }
-
-
- /* function to get the next character from the document file and filter it
- * as the user desires ... return:
- * EOF if end of file encountered;
- * '/0' if the character is a delimiter;
- * otherwise, the character itself (filtered into upper-case,
- * if it was lower-case)
- */
-
- int filtered_getc ()
- {
- static int prevc, c = '\0';
- int nextc;
- extern int keep_all_punct, keep_embedded_punct, keep_special_chars;
- extern FILE *doc_file;
-
- prevc = c;
- c = getc (doc_file);
-
- if (c == EOF)
- return (EOF);
-
- if (islower (c))
- return (c = toupper (c));
-
- if (isupper (c) || isdigit (c))
- return (c);
-
- if (isspace (c))
- return (c = '\0');
-
- if (keep_special_chars && ! isascii (c))
- return (c);
-
- if (keep_all_punct && ispunct (c))
- return (c);
-
- if (keep_embedded_punct && ispunct (c))
- {
- if (prevc == '\0')
- return (c = '\0');
- nextc = getc (doc_file);
- ungetc (nextc, doc_file);
- if (nextc == EOF)
- return (c = '\0');
- if (isalnum (nextc) || (keep_special_chars && ! isascii (nextc)))
- return (c);
- else
- return (c = '\0');
- }
-
- return (c = '\0');
- }
-
-
-